home *** CD-ROM | disk | FTP | other *** search
- Subject: v17i099: Gnu E?GREP (it's fast), Part02/05
- Newsgroups: comp.sources.unix
- Sender: sources
- Approved: rsalz@uunet.UU.NET
-
- Submitted-by: Mike Haertel <mike@wheaties.ai.mit.edu>
- Posting-number: Volume 17, Issue 99
- Archive-name: gnugrep/part02
-
- #! /bin/sh
- # This is a shell archive. Remove anything before this line, then unpack
- # it by saving it into a file and typing "sh file". To overwrite existing
- # files, type "sh file -c". You can also feed this as standard input via
- # unshar, or by typing "sh <file", e.g.. If this archive is complete, you
- # will see the following message at the end:
- # "End of shell archive."
- # Contents: dfa.h grep.c
- # Wrapped by mike@odin on Wed Dec 14 21:25:47 1988
- PATH=/bin:/usr/bin:/usr/ucb ; export PATH
- if test -f 'dfa.h' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'dfa.h'\"
- else
- echo shar: Extracting \"'dfa.h'\" \(22427 characters\)
- sed "s/^X//" >'dfa.h' <<'END_OF_FILE'
- X/* dfa.h - declarations for GNU deterministic regexp compiler
- X Copyright (C) 1988 Free Software Foundation, Inc.
- X Written June, 1988 by Mike Haertel
- X
- X NO WARRANTY
- X
- X BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
- XNO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT
- XWHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
- XRICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
- XWITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
- XBUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
- XFITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY
- XAND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
- XDEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
- XCORRECTION.
- X
- X IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
- XSTALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
- XWHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
- XLIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
- XOTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
- XUSE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
- XDATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
- XA FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
- XPROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
- XDAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
- X
- X GENERAL PUBLIC LICENSE TO COPY
- X
- X 1. You may copy and distribute verbatim copies of this source file
- Xas you receive it, in any medium, provided that you conspicuously and
- Xappropriately publish on each copy a valid copyright notice "Copyright
- X (C) 1988 Free Software Foundation, Inc."; and include following the
- Xcopyright notice a verbatim copy of the above disclaimer of warranty
- Xand of this License. You may charge a distribution fee for the
- Xphysical act of transferring a copy.
- X
- X 2. You may modify your copy or copies of this source file or
- Xany portion of it, and copy and distribute such modifications under
- Xthe terms of Paragraph 1 above, provided that you also do the following:
- X
- X a) cause the modified files to carry prominent notices stating
- X that you changed the files and the date of any change; and
- X
- X b) cause the whole of any work that you distribute or publish,
- X that in whole or in part contains or is a derivative of this
- X program or any part thereof, to be licensed at no charge to all
- X third parties on terms identical to those contained in this
- X License Agreement (except that you may choose to grant more extensive
- X warranty protection to some or all third parties, at your option).
- X
- X c) You may charge a distribution fee for the physical act of
- X transferring a copy, and you may at your option offer warranty
- X protection in exchange for a fee.
- X
- XMere aggregation of another unrelated program with this program (or its
- Xderivative) on a volume of a storage or distribution medium does not bring
- Xthe other program under the scope of these terms.
- X
- X 3. You may copy and distribute this program or any portion of it in
- Xcompiled, executable or object code form under the terms of Paragraphs
- X1 and 2 above provided that you do the following:
- X
- X a) accompany it with the complete corresponding machine-readable
- X source code, which must be distributed under the terms of
- X Paragraphs 1 and 2 above; or,
- X
- X b) accompany it with a written offer, valid for at least three
- X years, to give any third party free (except for a nominal
- X shipping charge) a complete machine-readable copy of the
- X corresponding source code, to be distributed under the terms of
- X Paragraphs 1 and 2 above; or,
- X
- X c) accompany it with the information you received as to where the
- X corresponding source code may be obtained. (This alternative is
- X allowed only for noncommercial distribution and only if you
- X received the program in object code or executable form alone.)
- X
- XFor an executable file, complete source code means all the source code for
- Xall modules it contains; but, as a special exception, it need not include
- Xsource code for modules which are standard libraries that accompany the
- Xoperating system on which the executable file runs.
- X
- X 4. You may not copy, sublicense, distribute or transfer this program
- Xexcept as expressly provided under this License Agreement. Any attempt
- Xotherwise to copy, sublicense, distribute or transfer this program is void and
- Xyour rights to use the program under this License agreement shall be
- Xautomatically terminated. However, parties who have received computer
- Xsoftware programs from you with this License Agreement will not have
- Xtheir licenses terminated so long as such parties remain in full compliance.
- X
- X 5. If you wish to incorporate parts of this program into other free
- Xprograms whose distribution conditions are different, write to the Free
- XSoftware Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet
- Xworked out a simple rule that can be stated here, but we will often permit
- Xthis. We will be guided by the two goals of preserving the free status of
- Xall derivatives our free software and of promoting the sharing and reuse of
- Xsoftware.
- X
- X
- XIn other words, you are welcome to use, share and improve this program.
- XYou are forbidden to forbid anyone else to use, share and improve
- Xwhat you give them. Help stamp out software-hoarding! */
- X
- X#ifdef __STDC__
- X
- X/* Missing include files for GNU C. */
- X/* #include <stdlib.h> */
- Xtypedef int size_t;
- Xextern void *calloc(int, size_t);
- Xextern void *malloc(size_t);
- Xextern void *realloc(void *, size_t);
- Xextern void free(void *);
- X
- X#ifndef USG
- Xextern char *strchr(), *strrchr(), *memcpy();
- X#else
- Xextern char *index();
- X#endif
- X
- Xextern char *bcopy(), *bzero();
- X
- X#ifdef SOMEDAY
- X#define ISALNUM(c) isalnum(c)
- X#define ISALPHA(c) isalpha(c)
- X#else
- X#define ISALNUM(c) (isascii(c) && isalnum(c))
- X#define ISALPHA(c) (isascii(c) && isalpha(c))
- X#endif
- X
- X#else /* ! __STDC__ */
- X
- X#define const
- Xtypedef int size_t;
- Xextern char *calloc(), *malloc(), *realloc();
- Xextern void free();
- X
- X#ifndef USG
- Xextern char *strchr(), *strrchr(), *memcpy();
- X#else
- Xextern char *index();
- X#endif
- X
- Xextern char *bcopy(), *bzero();
- X
- X#define ISALNUM(c) (isascii(c) && isalnum(c))
- X#define ISALPHA(c) (isascii(c) && isalpha(c))
- X
- X#endif /* ! __STDC__ */
- X
- X/* 1 means plain parentheses serve as grouping, and backslash
- X parentheses are needed for literal searching.
- X 0 means backslash-parentheses are grouping, and plain parentheses
- X are for literal searching. */
- X#define RE_NO_BK_PARENS 1
- X
- X/* 1 means plain | serves as the "or"-operator, and \| is a literal.
- X 0 means \| serves as the "or"-operator, and | is a literal. */
- X#define RE_NO_BK_VBAR 2
- X
- X/* 0 means plain + or ? serves as an operator, and \+, \? are literals.
- X 1 means \+, \? are operators and plain +, ? are literals. */
- X#define RE_BK_PLUS_QM 4
- X
- X/* 1 means | binds tighter than ^ or $.
- X 0 means the contrary. */
- X#define RE_TIGHT_VBAR 8
- X
- X/* 1 means treat \n as an _OR operator
- X 0 means treat it as a normal character */
- X#define RE_NEWLINE_OR 16
- X
- X/* 0 means that a special characters (such as *, ^, and $) always have
- X their special meaning regardless of the surrounding context.
- X 1 means that special characters may act as normal characters in some
- X contexts. Specifically, this applies to:
- X ^ - only special at the beginning, or after ( or |
- X $ - only special at the end, or before ) or |
- X *, +, ? - only special when not after the beginning, (, or | */
- X#define RE_CONTEXT_INDEP_OPS 32
- X
- X/* Now define combinations of bits for the standard possibilities. */
- X#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_CONTEXT_INDEP_OPS)
- X#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK | RE_NEWLINE_OR)
- X#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
- X#define RE_SYNTAX_EMACS 0
- X
- X/* The NULL pointer. */
- X#define NULL 0
- X
- X/* Number of bits in an unsigned char. */
- X#define CHARBITS 8
- X
- X/* First integer value that is greater than any character code. */
- X#define _NOTCHAR (1 << CHARBITS)
- X
- X/* INTBITS need not be exact, just a lower bound. */
- X#define INTBITS (CHARBITS * sizeof (int))
- X
- X/* Number of ints required to hold a bit for every character. */
- X#define _CHARSET_INTS ((_NOTCHAR + INTBITS - 1) / INTBITS)
- X
- X/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */
- Xtypedef int _charset[_CHARSET_INTS];
- X
- X/* The regexp is parsed into an array of tokens in postfix form. Some tokens
- X are operators and others are terminal symbols. Most (but not all) of these
- X codes are returned by the lexical analyzer. */
- X#ifdef __STDC__
- X
- Xtypedef enum
- X{
- X _END = -1, /* _END is a terminal symbol that matches the
- X end of input; any value of _END or less in
- X the parse tree is such a symbol. Accepting
- X states of the DFA are those that would have
- X a transition on _END. */
- X
- X /* Ordinary character values are terminal symbols that match themselves. */
- X
- X _EMPTY = _NOTCHAR, /* _EMPTY is a terminal symbol that matches
- X the empty string. */
- X
- X _BACKREF, /* _BACKREF is generated by \<digit>; it
- X it not completely handled. If the scanner
- X detects a transition on backref, it returns
- X a kind of "semi-success" indicating that
- X the match will have to be verified with
- X a backtracking matcher. */
- X
- X _BEGLINE, /* _BEGLINE is a terminal symbol that matches
- X the empty string if it is at the beginning
- X of a line. */
- X
- X _ALLBEGLINE, /* _ALLBEGLINE is a terminal symbol that
- X matches the empty string if it is at the
- X beginning of a line; _ALLBEGLINE applies
- X to the entire regexp and can only occur
- X as the first token thereof. _ALLBEGLINE
- X never appears in the parse tree; a _BEGLINE
- X is prepended with _CAT to the entire
- X regexp instead. */
- X
- X _ENDLINE, /* _ENDLINE is a terminal symbol that matches
- X the empty string if it is at the end of
- X a line. */
- X
- X _ALLENDLINE, /* _ALLENDLINE is to _ENDLINE as _ALLBEGLINE
- X is to _BEGLINE. */
- X
- X _BEGWORD, /* _BEGWORD is a terminal symbol that matches
- X the empty string if it is at the beginning
- X of a word. */
- X
- X _ENDWORD, /* _ENDWORD is a terminal symbol that matches
- X the empty string if it is at the end of
- X a word. */
- X
- X _LIMWORD, /* _LIMWORD is a terminal symbol that matches
- X the empty string if it is at the beginning
- X or the end of a word. */
- X
- X _NOTLIMWORD, /* _NOTLIMWORD is a terminal symbol that
- X matches the empty string if it is not at
- X the beginning or end of a word. */
- X
- X _QMARK, /* _QMARK is an operator of one argument that
- X matches zero or one occurences of its
- X argument. */
- X
- X _STAR, /* _STAR is an operator of one argument that
- X matches the Kleene closure (zero or more
- X occurrences) of its argument. */
- X
- X _PLUS, /* _PLUS is an operator of one argument that
- X matches the positive closure (one or more
- X occurrences) of its argument. */
- X
- X _CAT, /* _CAT is an operator of two arguments that
- X matches the concatenation of its
- X arguments. _CAT is never returned by the
- X lexical analyzer. */
- X
- X _OR, /* _OR is an operator of two arguments that
- X matches either of its arguments. */
- X
- X _LPAREN, /* _LPAREN never appears in the parse tree,
- X it is only a lexeme. */
- X
- X _RPAREN, /* _RPAREN never appears in the parse tree. */
- X
- X _SET /* _SET and (and any value greater) is a
- X terminal symbol that matches any of a
- X class of characters. */
- X} _token;
- X
- X#else /* ! __STDC__ */
- X
- Xtypedef short _token;
- X
- X#define _END -1
- X#define _EMPTY _NOTCHAR
- X#define _BACKREF (_EMPTY + 1)
- X#define _BEGLINE (_EMPTY + 2)
- X#define _ALLBEGLINE (_EMPTY + 3)
- X#define _ENDLINE (_EMPTY + 4)
- X#define _ALLENDLINE (_EMPTY + 5)
- X#define _BEGWORD (_EMPTY + 6)
- X#define _ENDWORD (_EMPTY + 7)
- X#define _LIMWORD (_EMPTY + 8)
- X#define _NOTLIMWORD (_EMPTY + 9)
- X#define _QMARK (_EMPTY + 10)
- X#define _STAR (_EMPTY + 11)
- X#define _PLUS (_EMPTY + 12)
- X#define _CAT (_EMPTY + 13)
- X#define _OR (_EMPTY + 14)
- X#define _LPAREN (_EMPTY + 15)
- X#define _RPAREN (_EMPTY + 16)
- X#define _SET (_EMPTY + 17)
- X
- X#endif /* ! __STDC__ */
- X
- X/* Sets are stored in an array in the compiled regexp; the index of the
- X array corresponding to a given set token is given by _SET_INDEX(t). */
- X#define _SET_INDEX(t) ((t) - _SET)
- X
- X/* Sometimes characters can only be matched depending on the surrounding
- X context. Such context decisions depend on what the previous character
- X was, and the value of the current (lookahead) character. Context
- X dependent constraints are encoded as 8 bit integers. Each bit that
- X is set indicates that the constraint succeeds in the corresponding
- X context.
- X
- X bit 7 - previous and current are newlines
- X bit 6 - previous was newline, current isn't
- X bit 5 - previous wasn't newline, current is
- X bit 4 - neither previous nor current is a newline
- X bit 3 - previous and current are word-constituents
- X bit 2 - previous was word-constituent, current isn't
- X bit 1 - previous wasn't word-constituent, current is
- X bit 0 - neither previous nor current is word-constituent
- X
- X Word-constituent characters are those that satisfy isalnum().
- X
- X The macro _SUCCEEDS_IN_CONTEXT determines whether a a given constraint
- X succeeds in a particular context. Prevn is true if the previous character
- X was a newline, currn is true if the lookahead character is a newline.
- X Prevl and currl similarly depend upon whether the previous and current
- X characters are word-constituent letters. */
- X#define _MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
- X ((constraint) & 1 << ((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4)
- X#define _MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \
- X ((constraint) & 1 << ((prevl) ? 2 : 0) + ((currl) ? 1 : 0))
- X#define _SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \
- X (_MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \
- X && _MATCHES_LETTER_CONTEXT(constraint, prevl, currl))
- X
- X/* The following macros give information about what a constraint depends on. */
- X#define _PREV_NEWLINE_DEPENDENT(constraint) \
- X (((constraint) & 0xc0) >> 2 != ((constraint) & 0x30))
- X#define _PREV_LETTER_DEPENDENT(constraint) \
- X (((constraint) & 0x0c) >> 2 != ((constraint) & 0x03))
- X
- X/* Tokens that match the empty string subject to some constraint actually
- X work by applying that constraint to determine what may follow them,
- X taking into account what has gone before. The following values are
- X the constraints corresponding to the special tokens previously defined. */
- X#define _NO_CONSTRAINT 0xff
- X#define _BEGLINE_CONSTRAINT 0xcf
- X#define _ENDLINE_CONSTRAINT 0xaf
- X#define _BEGWORD_CONSTRAINT 0xf2
- X#define _ENDWORD_CONSTRAINT 0xf4
- X#define _LIMWORD_CONSTRAINT 0xf6
- X#define _NOTLIMWORD_CONSTRAINT 0xf9
- X
- X/* States of the recognizer correspond to sets of positions in the parse
- X tree, together with the constraints under which they may be matched.
- X So a position is encoded as an index into the parse tree together with
- X a constraint. */
- Xtypedef struct
- X{
- X unsigned index:24, /* Index into the parse array. */
- X constraint:8; /* Constraint for matching this position. */
- X} _position;
- X
- X/* Sets of positions are stored as arrays. */
- Xtypedef struct
- X{
- X _position *elems; /* Elements of this position set. */
- X int nelem; /* Number of elements in this set. */
- X} _position_set;
- X
- X/* A state of the regexp consists of a set of positions, some flags,
- X and the token value of the lowest-numbered position of the state that
- X contains an _END token. */
- Xtypedef struct
- X{
- X int hash; /* Hash of the positions of this state. */
- X _position_set elems; /* Positions this state could match. */
- X unsigned newline:1, /* True if previous state matched newline. */
- X letter:1, /* True if previous state matched a letter. */
- X backref:1, /* True if this state matches a \<digit>. */
- X constraint:8; /* Constraint for this state to accept. */
- X int first_end; /* Token value of the first _END in elems. */
- X} _dfa_state;
- X
- X/* If an r.e. is at most MUST_MAX characters long, we look for a string which
- X must appear in it; whatever's found is dropped into the struct reg. */
- X
- X#define MUST_MAX 50
- X
- X/* A compiled regular expression. */
- Xstruct regexp
- X{
- X /* Stuff built by the scanner. */
- X _charset *charsets; /* Array of character sets for _SET tokens. */
- X int cindex; /* Index for adding new charsets. */
- X int calloc; /* Number of charsets currently allocated. */
- X
- X /* Stuff built by the parser. */
- X _token *tokens; /* Postfix parse array. */
- X int tindex; /* Index for adding new tokens. */
- X int talloc; /* Number of tokens currently allocated. */
- X int depth; /* Depth required of an evaluation stack
- X used for depth-first traversal of the
- X parse tree. */
- X int nleaves; /* Number of leaves on the parse tree. */
- X int nregexps; /* Count of parallel regexps being built
- X with regparse(). */
- X
- X /* Stuff owned by the state builder. */
- X _dfa_state *states; /* States of the regexp. */
- X int sindex; /* Index for adding new states. */
- X int salloc; /* Number of states currently allocated. */
- X
- X /* Stuff built by the structure analyzer. */
- X _position_set *follows; /* Array of follow sets, indexed by position
- X index. The follow of a position is the set
- X of positions containing characters that
- X could conceivably follow a character
- X matching the given position in a string
- X matching the regexp. Allocated to the
- X maximum possible position index. */
- X int searchflag; /* True if we are supposed to build a searching
- X as opposed to an exact matcher. A searching
- X matcher finds the first and shortest string
- X matching a regexp anywhere in the buffer,
- X whereas an exact matcher finds the longest
- X string matching, but anchored to the
- X beginning of the buffer. */
- X
- X /* Stuff owned by the executor. */
- X int tralloc; /* Number of transition tables that have
- X slots so far. */
- X int trcount; /* Number of transition tables that have
- X actually been built. */
- X int **trans; /* Transition tables for states that can
- X never accept. If the transitions for a
- X state have not yet been computed, or the
- X state could possibly accept, its entry in
- X this table is NULL. */
- X int **realtrans; /* Trans always points to realtrans + 1; this
- X is so trans[-1] can contain NULL. */
- X int **fails; /* Transition tables after failing to accept
- X on a state that potentially could do so. */
- X int *success; /* Table of acceptance conditions used in
- X regexecute and computed in build_state. */
- X int *newlines; /* Transitions on newlines. The entry for a
- X newline in any transition table is always
- X -1 so we can count lines without wasting
- X too many cycles. The transition for a
- X newline is stored separately and handled
- X as a special case. Newline is also used
- X as a sentinel at the end of the buffer. */
- X char must[MUST_MAX];
- X int mustn;
- X};
- X
- X/* Some macros for user access to regexp internals. */
- X
- X/* ACCEPTING returns true if s could possibly be an accepting state of r. */
- X#define ACCEPTING(s, r) ((r).states[s].constraint)
- X
- X/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the
- X specified context. */
- X#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, reg) \
- X _SUCCEEDS_IN_CONTEXT((reg).states[state].constraint, \
- X prevn, currn, prevl, currl)
- X
- X/* FIRST_MATCHING_REGEXP returns the index number of the first of parallel
- X regexps that a given state could accept. Parallel regexps are numbered
- X starting at 1. */
- X#define FIRST_MATCHING_REGEXP(state, reg) (-(reg).states[state].first_end)
- X
- X/* Entry points. */
- X
- X#ifdef __STDC__
- X
- X/* Regsyntax() takes two arguments; the first sets the syntax bits described
- X earlier in this file, and the second sets the case-folding flag. */
- Xextern void regsyntax(int, int);
- X
- X/* Compile the given string of the given length into the given struct regexp.
- X Final argument is a flag specifying whether to build a searching or an
- X exact matcher. */
- Xextern void regcompile(const char *, size_t, struct regexp *, int);
- X
- X/* Execute the given struct regexp on the buffer of characters. The
- X first char * points to the beginning, and the second points to the
- X first character after the end of the buffer, which must be a writable
- X place so a sentinel end-of-buffer marker can be stored there. The
- X second-to-last argument is a flag telling whether to allow newlines to
- X be part of a string matching the regexp. The next-to-last argument,
- X if non-NULL, points to a place to increment every time we see a
- X newline. The final argument, if non-NULL, points to a flag that will
- X be set if further examination by a backtracking matcher is needed in
- X order to verify backreferencing; otherwise the flag will be cleared.
- X Returns NULL if no match is found, or a pointer to the first
- X character after the first & shortest matching string in the buffer. */
- Xextern char *regexecute(struct regexp *, char *, char *, int, int *, int *);
- X
- X/* Free the storage held by the components of a struct regexp. */
- Xextern void regfree(struct regexp *);
- X
- X/* Entry points for people who know what they're doing. */
- X
- X/* Initialize the components of a struct regexp. */
- Xextern void reginit(struct regexp *);
- X
- X/* Incrementally parse a string of given length into a struct regexp. */
- Xextern void regparse(const char *, size_t, struct regexp *);
- X
- X/* Analyze a parsed regexp; second argument tells whether to build a searching
- X or an exact matcher. */
- Xextern void reganalyze(struct regexp *, int);
- X
- X/* Compute, for each possible character, the transitions out of a given
- X state, storing them in an array of integers. */
- Xextern void regstate(int, struct regexp *, int []);
- X
- X/* Error handling. */
- X
- X/* Regerror() is called by the regexp routines whenever an error occurs. It
- X takes a single argument, a NUL-terminated string describing the error.
- X The default regerror() prints the error message to stderr and exits.
- X The user can provide a different regfree() if so desired. */
- Xextern void regerror(const char *);
- X
- X#else /* ! __STDC__ */
- Xextern void regsyntax(), regcompile(), regfree(), reginit(), regparse();
- Xextern void reganalyze(), regstate(), regerror();
- Xextern char *regexecute();
- X#endif
- END_OF_FILE
- if test 22427 -ne `wc -c <'dfa.h'`; then
- echo shar: \"'dfa.h'\" unpacked with wrong size!
- fi
- # end of 'dfa.h'
- fi
- if test -f 'grep.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'grep.c'\"
- else
- echo shar: Extracting \"'grep.c'\" \(28346 characters\)
- sed "s/^X//" >'grep.c' <<'END_OF_FILE'
- X/* grep - print lines matching an extended regular expression
- X Copyright (C) 1988 Free Software Foundation, Inc.
- X Written June, 1988 by Mike Haertel
- X BMG speedups added July, 1988
- X by James A. Woods and Arthur David Olson
- X
- X NO WARRANTY
- X
- X BECAUSE THIS PROGRAM IS LICENSED FREE OF CHARGE, WE PROVIDE ABSOLUTELY
- XNO WARRANTY, TO THE EXTENT PERMITTED BY APPLICABLE STATE LAW. EXCEPT
- XWHEN OTHERWISE STATED IN WRITING, FREE SOFTWARE FOUNDATION, INC,
- XRICHARD M. STALLMAN AND/OR OTHER PARTIES PROVIDE THIS PROGRAM "AS IS"
- XWITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
- XBUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
- XFITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY
- XAND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
- XDEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
- XCORRECTION.
- X
- X IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL RICHARD M.
- XSTALLMAN, THE FREE SOFTWARE FOUNDATION, INC., AND/OR ANY OTHER PARTY
- XWHO MAY MODIFY AND REDISTRIBUTE THIS PROGRAM AS PERMITTED BELOW, BE
- XLIABLE TO YOU FOR DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR
- XOTHER SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
- XUSE OR INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR
- XDATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR
- XA FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS) THIS
- XPROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH
- XDAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
- X
- X GENERAL PUBLIC LICENSE TO COPY
- X
- X 1. You may copy and distribute verbatim copies of this source file
- Xas you receive it, in any medium, provided that you conspicuously and
- Xappropriately publish on each copy a valid copyright notice "Copyright
- X (C) 1988 Free Software Foundation, Inc."; and include following the
- Xcopyright notice a verbatim copy of the above disclaimer of warranty
- Xand of this License. You may charge a distribution fee for the
- Xphysical act of transferring a copy.
- X
- X 2. You may modify your copy or copies of this source file or
- Xany portion of it, and copy and distribute such modifications under
- Xthe terms of Paragraph 1 above, provided that you also do the following:
- X
- X a) cause the modified files to carry prominent notices stating
- X that you changed the files and the date of any change; and
- X
- X b) cause the whole of any work that you distribute or publish,
- X that in whole or in part contains or is a derivative of this
- X program or any part thereof, to be licensed at no charge to all
- X third parties on terms identical to those contained in this
- X License Agreement (except that you may choose to grant more extensive
- X warranty protection to some or all third parties, at your option).
- X
- X c) You may charge a distribution fee for the physical act of
- X transferring a copy, and you may at your option offer warranty
- X protection in exchange for a fee.
- X
- XMere aggregation of another unrelated program with this program (or its
- Xderivative) on a volume of a storage or distribution medium does not bring
- Xthe other program under the scope of these terms.
- X
- X 3. You may copy and distribute this program or any portion of it in
- Xcompiled, executable or object code form under the terms of Paragraphs
- X1 and 2 above provided that you do the following:
- X
- X a) accompany it with the complete corresponding machine-readable
- X source code, which must be distributed under the terms of
- X Paragraphs 1 and 2 above; or,
- X
- X b) accompany it with a written offer, valid for at least three
- X years, to give any third party free (except for a nominal
- X shipping charge) a complete machine-readable copy of the
- X corresponding source code, to be distributed under the terms of
- X Paragraphs 1 and 2 above; or,
- X
- X c) accompany it with the information you received as to where the
- X corresponding source code may be obtained. (This alternative is
- X allowed only for noncommercial distribution and only if you
- X received the program in object code or executable form alone.)
- X
- XFor an executable file, complete source code means all the source code for
- Xall modules it contains; but, as a special exception, it need not include
- Xsource code for modules which are standard libraries that accompany the
- Xoperating system on which the executable file runs.
- X
- X 4. You may not copy, sublicense, distribute or transfer this program
- Xexcept as expressly provided under this License Agreement. Any attempt
- Xotherwise to copy, sublicense, distribute or transfer this program is void and
- Xyour rights to use the program under this License agreement shall be
- Xautomatically terminated. However, parties who have received computer
- Xsoftware programs from you with this License Agreement will not have
- Xtheir licenses terminated so long as such parties remain in full compliance.
- X
- X 5. If you wish to incorporate parts of this program into other free
- Xprograms whose distribution conditions are different, write to the Free
- XSoftware Foundation at 675 Mass Ave, Cambridge, MA 02139. We have not yet
- Xworked out a simple rule that can be stated here, but we will often permit
- Xthis. We will be guided by the two goals of preserving the free status of
- Xall derivatives our free software and of promoting the sharing and reuse of
- Xsoftware.
- X
- X
- XIn other words, you are welcome to use, share and improve this program.
- XYou are forbidden to forbid anyone else to use, share and improve
- Xwhat you give them. Help stamp out software-hoarding! */
- X
- X#include <ctype.h>
- X#include <stdio.h>
- X#ifdef USG
- X#include <memory.h>
- X#include <string.h>
- X#else
- X#include <strings.h>
- X#endif
- X#include "dfa.h"
- X#include "regex.h"
- X
- X#ifdef __STDC__
- Xextern getopt(int, char **, const char *);
- Xextern read(int, void *, int);
- Xextern open(const char *, int, ...);
- Xextern void close();
- X#else
- Xextern char *strrchr();
- X#endif
- X
- Xextern char *optarg;
- Xextern optind, opterr;
- Xextern errno;
- Xextern char *sys_errlist[];
- X
- X#define MAX(a, b) ((a) > (b) ? (a) : (b))
- X
- X/* Exit status codes. */
- X#define MATCHES_FOUND 0 /* Exit 0 if no errors and matches found. */
- X#define NO_MATCHES_FOUND 1 /* Exit 1 if no matches were found. */
- X#define ERROR 2 /* Exit 2 if some error occurred. */
- X
- X/* Error is set true if something awful happened. */
- Xstatic int error;
- X
- X/* The program name for error messages. */
- Xstatic char *prog;
- X
- X/* We do all our own buffering by hand for efficiency. */
- Xstatic char *buffer; /* The buffer itself, grown as needed. */
- Xstatic bufbytes; /* Number of bytes in the buffer. */
- Xstatic size_t bufalloc; /* Number of bytes allocated to the buffer. */
- Xstatic bufprev; /* Number of bytes that have been forgotten.
- X This is used to get byte offsets from the
- X beginning of the file. */
- Xstatic bufread; /* Number of bytes to get with each read(). */
- X
- Xstatic void
- Xinitialize_buffer()
- X{
- X bufread = 8192;
- X bufalloc = bufread + bufread / 2;
- X buffer = malloc(bufalloc);
- X if (! buffer)
- X {
- X fprintf(stderr, "%s: Memory exhausted (%s)\n", prog,
- X sys_errlist[errno]);
- X exit(ERROR);
- X }
- X}
- X
- X/* The current input file. */
- Xstatic fd;
- Xstatic char *filename;
- Xstatic eof;
- X
- X/* Fill the buffer retaining the last n bytes at the beginning of the
- X newly filled buffer (for backward context). Returns the number of new
- X bytes read from disk. */
- Xstatic
- Xfill_buffer_retaining(n)
- X int n;
- X{
- X char *p, *q;
- X int i;
- X
- X /* See if we need to grow the buffer. */
- X if (bufalloc - n <= bufread)
- X {
- X while (bufalloc - n <= bufread)
- X {
- X bufalloc *= 2;
- X bufread *= 2;
- X }
- X buffer = realloc(buffer, bufalloc);
- X if (! buffer)
- X {
- X fprintf(stderr, "%s: Memory exhausted (%s)\n", prog,
- X sys_errlist[errno]);
- X exit(ERROR);
- X }
- X }
- X
- X bufprev += bufbytes - n;
- X
- X /* Shift stuff down. */
- X for (i = n, p = buffer, q = p + bufbytes - n; i--; )
- X *p++ = *q++;
- X bufbytes = n;
- X
- X if (eof)
- X return 0;
- X
- X /* Read in new stuff. */
- X i = read(fd, buffer + bufbytes, bufread);
- X if (i < 0)
- X {
- X fprintf(stderr, "%s: read on %s failed (%s)\n", prog,
- X filename ? filename : "<stdin>", sys_errlist[errno]);
- X error = 1;
- X }
- X
- X /* Kludge to pretend every nonempty file ends with a newline. */
- X if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
- X {
- X eof = i = 1;
- X buffer[bufbytes] = '\n';
- X }
- X
- X bufbytes += i;
- X return i;
- X}
- X
- X/* Various flags set according to the argument switches. */
- Xstatic trailing_context; /* Lines of context to show after matches. */
- Xstatic leading_context; /* Lines of context to show before matches. */
- Xstatic byte_count; /* Precede output lines the byte count of the
- X first character on the line. */
- Xstatic no_filenames; /* Do not display filenames. */
- Xstatic line_numbers; /* Precede output lines with line numbers. */
- Xstatic silent; /* Produce no output at all. This switch
- X is bogus, ever hear of /dev/null? */
- Xstatic nonmatching_lines; /* Print lines that don't match the regexp. */
- X
- Xstatic bmgexec; /* Invoke Boyer-Moore-Gosper routines */
- X
- X/* The compiled regular expression lives here. */
- Xstatic struct regexp reg;
- X
- X/* The compiled regular expression for the backtracking matcher lives here. */
- Xstatic struct re_pattern_buffer regex;
- X
- X/* Pointer in the buffer after the last character printed. */
- Xstatic char *printed_limit;
- X
- X/* True when printed_limit has been artifically advanced without printing
- X anything. */
- Xstatic int printed_limit_fake;
- X
- X/* Print a line at the given line number, returning the number of
- X characters actually printed. Matching is true if the line is to
- X be considered a "matching line". This is only meaningful if
- X surrounding context is turned on. */
- Xstatic
- Xprint_line(p, number, matching)
- X char *p;
- X int number;
- X int matching;
- X{
- X int count = 0;
- X
- X if (silent)
- X {
- X do
- X ++count;
- X while (*p++ != '\n');
- X printed_limit_fake = 0;
- X printed_limit = p;
- X return count;
- X }
- X
- X if (filename && !no_filenames)
- X printf("%s%c", filename, matching ? ':' : '-');
- X if (byte_count)
- X printf("%d%c", p - buffer + bufprev, matching ? ':' : '-');
- X if (line_numbers)
- X printf("%d%c", number, matching ? ':' : '-');
- X do
- X {
- X ++count;
- X putchar(*p);
- X }
- X while (*p++ != '\n');
- X printed_limit_fake = 0;
- X printed_limit = p;
- X return count;
- X}
- X
- X/* Print matching or nonmatching lines from the current file. Returns a
- X count of matching or nonmatching lines. */
- Xstatic
- Xgrep()
- X{
- X int retain = 0; /* Number of bytes to retain on next call
- X to fill_buffer_retaining(). */
- X char *search_limit; /* Pointer to the character after the last
- X newline in the buffer. */
- X char saved_char; /* Character after the last newline. */
- X char *resume; /* Pointer to where to resume search. */
- X int resume_index = 0; /* Count of characters to ignore after
- X refilling the buffer. */
- X int line_count = 1; /* Line number. */
- X int try_backref; /* Set to true if we need to verify the
- X match with a backtracking matcher. */
- X int initial_line_count; /* Line count at beginning of last search. */
- X char *match; /* Pointer to the first character after the
- X string matching the regexp. */
- X int match_count = 0; /* Count of matching lines. */
- X char *matching_line; /* Pointer to first character of the matching
- X line, or of the first line of context to
- X print if context is turned on. */
- X char *real_matching_line; /* Pointer to the first character of the
- X real matching line. */
- X char *next_line; /* Pointer to first character of the line
- X following the matching line. */
- X int pending_lines = 0; /* Lines of context left over from last match
- X that we have to print. */
- X static first_match = 1; /* True when nothing has been printed. */
- X int i;
- X char *tmp;
- X char *execute();
- X
- X printed_limit_fake = 0;
- X
- X while (fill_buffer_retaining(retain) > 0)
- X {
- X /* Find the last newline in the buffer. */
- X search_limit = buffer + bufbytes;
- X while (search_limit > buffer && search_limit[-1] != '\n')
- X --search_limit;
- X if (search_limit == buffer)
- X {
- X retain = bufbytes;
- X continue;
- X }
- X
- X /* Save the character after the last newline so regexecute can write
- X its own sentinel newline. */
- X saved_char = *search_limit;
- X
- X /* Search the buffer for a match. */
- X printed_limit = buffer;
- X resume = buffer + resume_index;
- X initial_line_count = line_count;
- X
- X while (match = execute(®, resume, search_limit, 0, &line_count, &try_backref))
- X {
- X ++match_count;
- X
- X /* Find the beginning of the matching line. */
- X matching_line = match;
- X while (matching_line > resume && matching_line[-1] != '\n')
- X --matching_line;
- X real_matching_line = matching_line;
- X
- X /* Find the beginning of the next line. */
- X next_line = match;
- X while (next_line < search_limit && *next_line++ != '\n')
- X ;
- X
- X /* If a potential backreference is indicated, try it out with
- X a backtracking matcher to make sure the line is a match. */
- X if (try_backref && re_search(®ex, matching_line,
- X next_line - matching_line,
- X 0,
- X next_line - matching_line,
- X NULL) < 0)
- X {
- X resume = next_line;
- X if (resume == search_limit)
- X break;
- X else
- X continue;
- X }
- X
- X /* Print leftover lines from last time. If nonmatching_lines is
- X turned on, print these as if they were matching lines. */
- X while (resume < matching_line && pending_lines)
- X {
- X resume += print_line(resume, initial_line_count++,
- X nonmatching_lines);
- X --pending_lines;
- X }
- X
- X /* Print out the matching or nonmatching lines as necessary. */
- X if (! nonmatching_lines)
- X {
- X /* Back up over leading context if necessary. */
- X for (i = leading_context; matching_line > printed_limit
- X && i; --i)
- X {
- X while (matching_line > printed_limit
- X && (--matching_line)[-1] != '\n')
- X ;
- X --line_count;
- X }
- X
- X /* If context is enabled, we may have to print a separator. */
- X if ((leading_context || trailing_context) && !silent
- X && !first_match && (printed_limit_fake || matching_line
- X > printed_limit))
- X printf("----------\n");
- X first_match = 0;
- X
- X /* Print the matching line and its leading context. */
- X while (matching_line < real_matching_line)
- X matching_line += print_line(matching_line, line_count++, 0);
- X matching_line += print_line(matching_line, line_count++, 1);
- X
- X /* If there's trailing context, leave some lines pending until
- X next time. */
- X pending_lines = trailing_context;
- X }
- X else if (matching_line > resume)
- X {
- X char *real_resume = resume;
- X
- X /* Back up over leading context if necessary. */
- X for (i = leading_context; resume > printed_limit && i; --i)
- X {
- X while (resume > printed_limit && (--resume)[-1] != '\n')
- X ;
- X --initial_line_count;
- X }
- X
- X /* If context is enabled, we may have to print a separator. */
- X if ((leading_context || trailing_context) && !silent
- X && !first_match && (printed_limit_fake || resume
- X > printed_limit))
- X printf("----------\n");
- X first_match = 0;
- X
- X /* Print out the presumably matching leading context. */
- X while (resume < real_resume)
- X resume += print_line(resume, initial_line_count++, 0);
- X
- X /* Print out the nonmatching lines prior to the matching line. */
- X while (resume < matching_line)
- X resume += print_line(resume, initial_line_count++, 1);
- X
- X /* Deal with trailing context. */
- X if (trailing_context)
- X {
- X print_line(matching_line, line_count, 0);
- X pending_lines = trailing_context - 1;
- X }
- X
- X /* Count the current line. */
- X ++line_count;
- X }
- X else
- X {
- X /* The line immediately after a matching line has to be printed
- X because it was pending. */
- X if (pending_lines > 0)
- X {
- X --pending_lines;
- X print_line(matching_line, line_count, 0);
- X }
- X ++line_count;
- X }
- X
- X /* Resume searching at the beginning of the next line. */
- X initial_line_count = line_count;
- X resume = next_line;
- X
- X if (resume == search_limit)
- X break;
- X }
- X
- X /* Restore the saved character. */
- X *search_limit = saved_char;
- X
- X if (! nonmatching_lines)
- X {
- X while (resume < search_limit && pending_lines)
- X {
- X resume += print_line(resume, initial_line_count++, 0);
- X --pending_lines;
- X }
- X }
- X else if (search_limit > resume)
- X {
- X char *initial_resume = resume;
- X
- X /* Back up over leading context if necessary. */
- X for (i = leading_context; resume > printed_limit && i; --i)
- X {
- X while (resume > printed_limit && (--resume)[-1] != '\n')
- X ;
- X --initial_line_count;
- X }
- X
- X /* If context is enabled, we may have to print a separator. */
- X if ((leading_context || trailing_context) && !silent
- X && !first_match && (printed_limit_fake || resume
- X > printed_limit))
- X printf("----------\n");
- X first_match = 0;
- X
- X /* Print out all the nonmatching lines up to the search limit. */
- X while (resume < initial_resume)
- X resume += print_line(resume, initial_line_count++, 0);
- X while (resume < search_limit)
- X resume += print_line(resume, initial_line_count++, 1);
- X
- X pending_lines = trailing_context;
- X resume_index = 0;
- X retain = bufbytes - (search_limit - buffer);
- X continue;
- X }
- X
- X /* Save the trailing end of the buffer for possible use as leading
- X context in the future. */
- X i = leading_context;
- X tmp = search_limit;
- X while (tmp > printed_limit && i--)
- X while (tmp > printed_limit && (--tmp)[-1] != '\n')
- X ;
- X resume_index = search_limit - tmp;
- X retain = bufbytes - (tmp - buffer);
- X if (tmp > printed_limit)
- X printed_limit_fake = 1;
- X }
- X
- X return nonmatching_lines ? line_count - match_count : match_count;
- X}
- X
- Xvoid
- Xusage_and_die()
- X{
- X fprintf(stderr,
- X"usage: %s [-CVbchilnsvwx] [-<num>] [-AB <num>] [-f file] [-e] expr [files]\n",
- X prog);
- X exit(ERROR);
- X}
- X
- Xstatic char version[] = "GNU e?grep, version 1.2";
- X
- Xmain(argc, argv)
- X int argc;
- X char **argv;
- X{
- X int c;
- X int ignore_case = 0; /* Compile the regexp to ignore case. */
- X char *the_regexp = 0; /* The regular expression. */
- X int regexp_len; /* Length of the regular expression. */
- X char *regexp_file = 0; /* File containing parallel regexps. */
- X int count_lines = 0; /* Display only a count of matching lines. */
- X int list_files = 0; /* Display only the names of matching files. */
- X int whole_word = 0; /* Insist that the regexp match a word only. */
- X int whole_line = 0; /* Insist on matching only whole lines. */
- X int line_count = 0; /* Count of matching lines for a file. */
- X int matches_found = 0; /* True if matches were found. */
- X char *regex_errmesg; /* Error message from regex routines. */
- X char translate[_NOTCHAR]; /* Translate table for case conversion
- X (needed by the backtracking matcher). */
- X
- X if (prog = strrchr(argv[0], '/'))
- X ++prog;
- X else
- X prog = argv[0];
- X
- X opterr = 0;
- X while ((c = getopt(argc, argv, "0123456789A:B:CVbce:f:hilnsvwx")) != EOF)
- X switch (c)
- X {
- X case '?':
- X usage_and_die();
- X break;
- X
- X case '0':
- X case '1':
- X case '2':
- X case '3':
- X case '4':
- X case '5':
- X case '6':
- X case '7':
- X case '8':
- X case '9':
- X trailing_context = 10 * trailing_context + c - '0';
- X leading_context = 10 * leading_context + c - '0';
- X break;
- X
- X case 'A':
- X if (! sscanf(optarg, "%d", &trailing_context)
- X || trailing_context < 0)
- X usage_and_die();
- X break;
- X
- X case 'B':
- X if (! sscanf(optarg, "%d", &leading_context)
- X || leading_context < 0)
- X usage_and_die();
- X break;
- X
- X case 'C':
- X trailing_context = leading_context = 2;
- X break;
- X
- X case 'V':
- X fprintf(stderr, "%s\n", version);
- X break;
- X
- X case 'b':
- X byte_count = 1;
- X break;
- X
- X case 'c':
- X count_lines = 1;
- X silent = 1;
- X break;
- X
- X case 'e':
- X /* It doesn't make sense to mix -f and -e. */
- X if (regexp_file)
- X usage_and_die();
- X the_regexp = optarg;
- X break;
- X
- X case 'f':
- X /* It doesn't make sense to mix -f and -e. */
- X if (the_regexp)
- X usage_and_die();
- X regexp_file = optarg;
- X break;
- X
- X case 'h':
- X no_filenames = 1;
- X break;
- X
- X case 'i':
- X ignore_case = 1;
- X for (c = 0; c < _NOTCHAR; ++c)
- X if (isupper(c))
- X translate[c] = tolower(c);
- X else
- X translate[c] = c;
- X regex.translate = translate;
- X break;
- X
- X case 'l':
- X list_files = 1;
- X silent = 1;
- X break;
- X
- X case 'n':
- X line_numbers = 1;
- X break;
- X
- X case 's':
- X silent = 1;
- X break;
- X
- X case 'v':
- X nonmatching_lines = 1;
- X break;
- X
- X case 'w':
- X whole_word = 1;
- X break;
- X
- X case 'x':
- X whole_line = 1;
- X break;
- X
- X default:
- X /* This can't happen. */
- X fprintf(stderr, "%s: getopt(3) let one by!\n", prog);
- X usage_and_die();
- X break;
- X }
- X
- X /* Set the syntax depending on arg 0 and whether to ignore case. */
- X if (*prog == 'e')
- X {
- X regsyntax(RE_SYNTAX_EGREP, ignore_case);
- X re_set_syntax(RE_SYNTAX_EGREP);
- X }
- X else
- X {
- X regsyntax(RE_SYNTAX_GREP, ignore_case);
- X re_set_syntax(RE_SYNTAX_GREP);
- X }
- X
- X /* Compile the regexp according to all the options. */
- X if (regexp_file)
- X {
- X FILE *fp = fopen(regexp_file, "r");
- X int len = 256;
- X int i = 0;
- X
- X if (! fp)
- X {
- X fprintf(stderr, "%s: %s: %s\n", prog, regexp_file,
- X sys_errlist[errno]);
- X exit(ERROR);
- X }
- X
- X the_regexp = malloc(len);
- X while ((c = getc(fp)) != EOF)
- X {
- X the_regexp[i++] = c;
- X if (i == len)
- X the_regexp = realloc(the_regexp, len *= 2);
- X }
- X /* Nuke the concluding newline so we won't match the empty string. */
- X if (i > 0 && the_regexp[i - 1] == '\n')
- X --i;
- X regexp_len = i;
- X }
- X else if (! the_regexp)
- X {
- X if (optind >= argc)
- X usage_and_die();
- X the_regexp = argv[optind++];
- X regexp_len = strlen(the_regexp);
- X }
- X else
- X regexp_len = strlen(the_regexp);
- X
- X if (whole_word || whole_line)
- X {
- X char *n = malloc(regexp_len + 8);
- X int i = 0;
- X
- X if (whole_line)
- X n[i++] = '^';
- X else
- X n[i++] = '\\', n[i++] = '<';
- X if (*prog != 'e')
- X n[i++] = '\\';
- X n[i++] = '(';
- X memcpy(n + i, the_regexp, regexp_len);
- X i += regexp_len;
- X if (*prog != 'e')
- X n[i++] = '\\';
- X n[i++] = ')';
- X if (whole_line)
- X n[i++] = '$';
- X else
- X n[i++] = '\\', n[i++] = '>';
- X the_regexp = n;
- X regexp_len = i;
- X }
- X
- X regcompile(the_regexp, regexp_len, ®, 1);
- X
- X if (regex_errmesg = re_compile_pattern(the_regexp, regexp_len, ®ex))
- X regerror(regex_errmesg);
- X
- X /*
- X Find the longest metacharacter-free string which must occur in the
- X regexpr, before short-circuiting regexecute() with Boyer-Moore-Gosper.
- X (Conjecture: The problem in general is NP-complete.) If there is no
- X such string (like for many alternations), then default to full automaton
- X search. regmust() code and heuristics [see dfa.c] courtesy
- X Arthur David Olson.
- X */
- X if (line_numbers == 0 && nonmatching_lines == 0)
- X {
- X if (reg.mustn == 0 || reg.mustn == MUST_MAX ||
- X strchr(reg.must, '\0') != reg.must + reg.mustn)
- X bmgexec = 0;
- X else
- X {
- X reg.must[reg.mustn] = '\0';
- X if (getenv("MUSTDEBUG") != NULL)
- X (void) printf("must have: \"%s\"\n", reg.must);
- X bmg_setup(reg.must, ignore_case);
- X bmgexec = 1;
- X }
- X }
- X
- X if (argc - optind < 2)
- X no_filenames = 1;
- X
- X initialize_buffer();
- X
- X if (argc > optind)
- X while (optind < argc)
- X {
- X bufprev = eof = 0;
- X filename = argv[optind++];
- X fd = open(filename, 0, 0);
- X if (fd < 0)
- X {
- X fprintf(stderr, "%s: %s: %s\n", prog, filename,
- X sys_errlist[errno]);
- X error = 1;
- X continue;
- X }
- X if (line_count = grep())
- X matches_found = 1;
- X close(fd);
- X if (count_lines)
- X if (!no_filenames)
- X printf("%s:%d\n", filename, line_count);
- X else
- X printf("%d\n", line_count);
- X else if (list_files && line_count)
- X printf("%s\n", filename);
- X }
- X else
- X {
- X if (line_count = grep())
- X matches_found = 1;
- X if (count_lines)
- X printf("%d\n", line_count);
- X else if (list_files && line_count)
- X printf("<stdin>\n");
- X }
- X
- X if (error)
- X exit(ERROR);
- X if (matches_found)
- X exit(MATCHES_FOUND);
- X exit(NO_MATCHES_FOUND);
- X}
- X
- X/* Needed by the regexp routines. This could be fancier, especially when
- X dealing with parallel regexps in files. */
- Xvoid
- Xregerror(s)
- X const char *s;
- X{
- X fprintf(stderr, "%s: %s\n", prog, s);
- X exit(ERROR);
- X}
- X
- X/*
- X bmg_setup() and bmg_search() adapted from:
- X Boyer/Moore/Gosper-assisted 'egrep' search, with delta0 table as in
- X original paper (CACM, October, 1977). No delta1 or delta2. According to
- X experiment (Horspool, Soft. Prac. Exp., 1982), delta2 is of minimal
- X practical value. However, to improve for worst case input, integrating
- X the improved Galil strategies (Apostolico/Giancarlo, Siam. J. Comput.,
- X February 1986) deserves consideration.
- X
- X James A. Woods Copyleft (C) 1986, 1988
- X NASA Ames Research Center
- X*/
- X
- Xchar *
- Xexecute(r, begin, end, newline, count, try_backref)
- X struct regexp *r;
- X char *begin;
- X char *end;
- X int newline;
- X int *count;
- X int *try_backref;
- X{
- X register char *p, *s;
- X char *match;
- X char *start = begin;
- X char save; /* regexecute() sentinel */
- X int len;
- X char *bmg_search();
- X
- X if (!bmgexec) /* full automaton search */
- X return(regexecute(r, begin, end, newline, count, try_backref));
- X else
- X {
- X len = end - begin;
- X while ((match = bmg_search((unsigned char *) start, len)) != NULL)
- X {
- X p = match; /* narrow search range to submatch line */
- X while (p > begin && *p != '\n')
- X p--;
- X s = match;
- X while (s < end && *s != '\n')
- X s++;
- X s++;
- X
- X save = *s;
- X *s = '\0';
- X match = regexecute(r, p, s, newline, count, try_backref);
- X *s = save;
- X
- X if (match != NULL)
- X return((char *) match);
- X else
- X {
- X start = s;
- X len = end - start;
- X }
- X }
- X return(NULL);
- X }
- X}
- X
- X#include <ctype.h>
- Xint delta0[256];
- Xunsigned char cmap[256]; /* (un)folded characters */
- Xunsigned char pattern[5000];
- Xint patlen;
- X
- Xchar *
- Xbmg_search(buffer, buflen)
- X unsigned char *buffer;
- X int buflen;
- X{
- X register unsigned char *k, *strend, *s, *buflim;
- X register int t;
- X int j;
- X
- X if (patlen > buflen)
- X return NULL;
- X
- X buflim = buffer + buflen;
- X if (buflen > patlen * 4)
- X strend = buflim - patlen * 4;
- X else
- X strend = buffer;
- X
- X s = buffer;
- X k = buffer + patlen - 1;
- X
- X for (;;)
- X {
- X /* The dreaded inner loop, revisited. */
- X while (k < strend && (t = delta0[*k]))
- X {
- X k += t;
- X k += delta0[*k];
- X k += delta0[*k];
- X }
- X while (k < buflim && delta0[*k])
- X ++k;
- X if (k == buflim)
- X break;
- X
- X j = patlen - 1;
- X s = k;
- X while (cmap[*--s] == pattern[--j])
- X ;
- X /*
- X delta-less shortcut for literati, but
- X short shrift for genetic engineers.
- X */
- X if (j >= 0)
- X k++;
- X else /* submatch */
- X return ((char *)k);
- X }
- X return(NULL);
- X}
- X
- Xbmg_setup(pat, folded) /* compute "boyer-moore" delta table */
- X char *pat;
- X int folded;
- X{ /* ... HAKMEM lives ... */
- X int j;
- X
- X patlen = strlen(pat);
- X
- X if (folded) /* fold case while saving pattern */
- X for (j = 0; j < patlen; j++)
- X pattern[j] = (isupper((int) pat[j]) ?
- X (char) tolower((int) pat[j]) : pat[j]);
- X else
- X memcpy(pattern, pat, patlen);
- X
- X for (j = 0; j < 256; j++)
- X {
- X delta0[j] = patlen;
- X cmap[j] = (char) j; /* could be done at compile time */
- X }
- X for (j = 0; j < patlen - 1; j++)
- X delta0[pattern[j]] = patlen - j - 1;
- X delta0[pattern[patlen - 1]] = 0;
- X
- X if (folded)
- X {
- X for (j = 0; j < patlen - 1; j++)
- X if (islower((int) pattern[j]))
- X delta0[toupper((int) pattern[j])] = patlen - j - 1;
- X if (islower((int) pattern[patlen - 1]))
- X delta0[toupper((int) pattern[patlen - 1])] = 0;
- X for (j = 'A'; j <= 'Z'; j++)
- X cmap[j] = (char) tolower((int) j);
- X }
- X}
- X
- X#ifndef USG
- X
- X/* (groan) compatibility */
- X
- Xchar *
- Xstrchr(s, c)
- X char *s;
- X{
- X return index(s, c);
- X}
- X
- Xchar *
- Xstrrchr(s, c)
- X char *s;
- X{
- X return rindex(s, c);
- X}
- X
- Xchar *
- Xmemcpy(d, s, n)
- X char *d, *s;
- X{
- X return bcopy(s, d, n);
- X}
- X
- X#else
- X
- Xchar *
- Xindex(s, c)
- X char *s;
- X{
- X return strchr(s, c);
- X}
- X
- Xchar *
- Xbcopy(s, d, n)
- X char *s, *d;
- X{
- X return memcpy(d, s, n);
- X}
- X
- Xchar *
- Xbzero(s, n)
- X char *s;
- X{
- X return memset(s, 0, n);
- X}
- X
- Xbcmp(s, t, n)
- X char *s, *t;
- X{
- X return memcmp(s, t, n);
- X}
- X
- X#endif
- END_OF_FILE
- if test 28346 -ne `wc -c <'grep.c'`; then
- echo shar: \"'grep.c'\" unpacked with wrong size!
- fi
- # end of 'grep.c'
- fi
- echo shar: End of shell archive.
- exit 0
-
-